from selenium import webdriver
from selenium.webdriver.common.by import By
import time
import re
import pandas as pd

def extract_data(html_code):
    p_title = '<div class="media-heading"><a href=".*?" target="_blank">(.*?)</a>'
    p_link = '<div class="media-heading"><a href="(.*?)" target="_blank">.*?</a>'
    p_date = '<span class="date">(.*?)</span>'
    p_source = '<span class="source">(.*?)</span>'
    title = re.findall(p_title, html_code, re.S)
    link = re.findall(p_link, html_code, re.S)
    date = re.findall(p_date, html_code, re.S)
    source = re.findall(p_source, html_code, re.S)
    title = title[:-1]
    link = link[:-1]
    date = date[:-1]
    source = source[:-1]
    data = {'标题': title, '网址': link, '发布时间': date, '来源': source}
    return pd.DataFrame(data)

browser = webdriver.Chrome()
browser.maximize_window()
url = 'http://www.kepu.gov.cn/www'
browser.get(url)
time.sleep(3)
browser.find_element(By.CSS_SELECTOR, '#sonnavhtml').find_element(By.LINK_TEXT, '农业').click()
time.sleep(3)
handles = browser.window_handles
browser.switch_to.window(handles[-1])

all_data = []
for page in range(5):
    all_data.append(extract_data(browser.page_source))
    browser.find_element(By.XPATH, '//*[@id="pageNumber"]/div/div[6]').click()
    time.sleep(3)
browser.quit()
all_data = pd.concat(all_data)
all_data.to_excel('农业资讯(多页).xlsx', index=False)
